import json
import re

# 1️⃣ 读取 train.json 文件
with open("D:\\yuanbei\\客服问答机器人需求表\\清洗数据\\data_process\\train.json", "r", encoding="utf-8") as f:
    data = json.load(f)

# 2️⃣ 匹配所有 @DP_xxx 模式（忽略大小写）
pattern = re.compile(r"@DP_[A-Za-z0-9]+", re.IGNORECASE)
found = set()

for pair in data:
    for msg in pair:
        content = msg.get("content", "")
        found.update(pattern.findall(content))

# 3️⃣ 按字母排序输出
usernames = sorted(found)
print(f"共提取到 {len(usernames)} 个 @DP_ 用户：\n")
print(usernames)

# 4️⃣ （可选）保存到文件
with open("D:\\yuanbei\\客服问答机器人需求表\\清洗数据\\data_process\\dp_usernames.txt", "w", encoding="utf-8") as f:
    for name in usernames:
        f.write(name + "\n")


print("\n✅ 用户名列表已保存到 /mnt/data/dp_usernames.txt")
